#!/usr/bin/env python2
"""
string_ops_test.py: Tests for string_ops.py
"""
from __future__ import print_function

import unittest

from core import error
from osh import string_ops  # module under test


class LibStrTest(unittest.TestCase):

    def test_NextUtf8Char(self):
        CASES = [
            ([1, 3, 6, 10], '\x24\xC2\xA2\xE0\xA4\xB9\xF0\x90\x8D\x88'),
            ([
                1, 3,
                'UTF-8 decode: Bad encoding at offset 3 in string of 6 bytes'
            ], '\x24\xC2\xA2\xE0\xE0\xA4'),
            ([
                1, 3, 6,
                'UTF-8 decode: Bad encoding at offset 6 in string of 7 bytes'
            ], '\x24\xC2\xA2\xE0\xA4\xA4\xB9'),
            ([
                1, 3,
                'UTF-8 decode: Bad encoding at offset 3 in string of 4 bytes'
            ], '\x24\xC2\xA2\xFF'),
            ([
                1,
                'UTF-8 decode: Truncated bytes at offset 1 in string of 4 bytes'
            ], '\x24\xF0\x90\x8D'),
        ]
        for expected_indexes, input_str in CASES:
            print()
            print('NextUtf8Char case %r %r' % (expected_indexes, input_str))
            i = 0
            actual_indexes = []
            while True:
                try:
                    i = string_ops.NextUtf8Char(input_str, i)
                    actual_indexes.append(i)
                    if i >= len(input_str):
                        break
                except error.Strict as e:
                    actual_indexes.append(e.msg)
                    break
            self.assertEqual(expected_indexes, actual_indexes)

    def test_DecodeNextUtf8Char(self):
        s = '\x61\xC3\x8A\xE1\x82\xA0\xF0\x93\x80\x80'
        codepoints = [0x61, 0xCA, 0x10A0, 0x13000]
        start = 0
        for codepoint in codepoints:
            end = string_ops.NextUtf8Char(s, start)
            codepoint = string_ops.DecodeUtf8Char(s, start)
            self.assertEqual(codepoint, codepoint)
            start = end

    def test_DecodePrevUtf8Char(self):
        s = '\x61\xC3\x8A\xE1\x82\xA0\xF0\x93\x80\x80'
        codepoints = [0x61, 0xCA, 0x10A0, 0x13000]
        end = len(s)
        for codepoint in reversed(codepoints):
            start = string_ops.PreviousUtf8Char(s, end)
            codepoint = string_ops.DecodeUtf8Char(s, start)
            self.assertEqual(codepoint, codepoint)
            end = start

    def test_DecodeUtf8CharError(self):
        CASES = [
            ('UTF-8 decode: Truncated bytes at offset 0 in string of 1 bytes',
             '\xC0'),
            ('UTF-8 decode: Bad encoding at offset 0 in string of 2 bytes',
             '\xC0\x01'),
            ('UTF-8 decode: Bad encoding at offset 0 in string of 1 bytes',
             '\xff'),
        ]
        for msg, input in CASES:
            with self.assertRaises(error.Expr) as ctx:
                string_ops.DecodeUtf8Char(input, 0)
            self.assertEqual(ctx.exception.msg, msg)

    def test_PreviousUtf8Char(self):
        # The error messages could probably be improved for more consistency
        # with NextUtf8Char, at the expense of more complexity.
        CASES = [
            ([6, 3, 1, 0], '\x24\xC2\xA2\xE0\xA4\xB9\xF0\x90\x8D\x88'),
            ([6, 3, 1, 'Invalid start of UTF-8 sequence'],
             '\xA2\xC2\xA2\xE0\xA4\xB9\xF0\x90\x8D\x88'),
            ([10, 'Invalid start of UTF-8 sequence'],
             '\xF0\x90\x8D\x88\x90\x8D\x88\x90\x8D\x88\x24'),
            ([3, 'Invalid start of UTF-8 sequence'], '\xF0\x90\x8D\x24'),
        ]
        for expected_indexes, input_str in CASES:
            print()
            print('PreviousUtf8Char case %r %r' %
                  (expected_indexes, input_str))
            i = len(input_str)
            actual_indexes = []
            while True:
                try:
                    i = string_ops.PreviousUtf8Char(input_str, i)
                    actual_indexes.append(i)
                    if i == 0:
                        break
                except error.Strict as e:
                    actual_indexes.append(e.msg)
                    break
            self.assertEqual(expected_indexes, actual_indexes)

    # The UTF-8 encoding of all the characters from string_ops.SPACES.
    # See comments there about why that set of characters was chosen.
    #
    # Generated by evaluating this Python3 fragment:
    #
    # ```
    # print('\u0009\u000a\u000b\u000c\u000d\u0020\u00a0\ufeff'.encode('utf-8'))
    # ```
    ALL_WHITESPACES_UTF8 = '\t\n\x0b\x0c\r \xc2\xa0\xef\xbb\xbf'

    def test_StartsWithWhitespaceByteRange(self):
        CASES = [
            ((0, 0), ''),
            ((0, 0), 'x'),
            ((0, 1), ' x'),
            ((0, 1), ' x '),
            ((0, 2), '\t x '),
            ((0, 11), LibStrTest.ALL_WHITESPACES_UTF8),
        ]
        for expected, input_str in CASES:
            print()
            print('StartsWithWhitespaceByteRange case %r %r' %
                  (expected, input_str))
            self.assertEqual(
                expected, string_ops.StartsWithWhitespaceByteRange(input_str))

    def test_EndsWithWhitespaceByteRange(self):
        CASES = [
            ((0, 0), ''),
            ((1, 1), 'x'),
            ((2, 2), ' x'),
            ((2, 3), ' x '),
            ((2, 4), ' x \t'),
            ((0, 11), LibStrTest.ALL_WHITESPACES_UTF8),
        ]

        for expected, input_str in CASES:
            print()
            print('EndsWithWhitespaceByteRange case %r %r' %
                  (expected, input_str))
            self.assertEqual(expected,
                             string_ops.EndsWithWhitespaceByteRange(input_str))

    def testUnarySuffixOpDemo(self):
        print(string_ops)

        s = 'abcd'
        n = len(s)

        # All of these loops test exactly 4.
        # NOTE: These are manually copied into DoUnarySuffixOp

        print('## shortest prefix')
        for i in xrange(1, n + 1):
            print('%d test %06r return %06r' % (i, s[:i], s[i:]))
        print()

        print('# longest prefix')
        for i in xrange(n, 0, -1):
            print('%d test %06r return %06r' % (i, s[:i], s[i:]))
        print()

        print('% shortest suffix')
        for i in xrange(n - 1, -1, -1):
            print('%d test %06r return %06r' % (i, s[i:], s[:i]))
        print()

        print('%% longest suffix')
        for i in xrange(0, n):
            print('%d test %06r return %06r' % (i, s[i:], s[:i]))
        print()

    def testPatSubAllMatches(self):
        s = 'oXooXoooX'

        # Match positions
        self.assertEqual([(1, 3), (4, 6)],
                         string_ops._AllMatchPositions(s, '(X.)'))

        # No match
        self.assertEqual([], string_ops._AllMatchPositions(s, '(z)'))

        # Replacement
        self.assertEqual('o_o_ooX', string_ops._PatSubAll(s, '(X.)', '_'))

        # Replacement with no match
        self.assertEqual(s, string_ops._PatSubAll(s, '(z)', '_'))


if __name__ == '__main__':
    unittest.main()
